#!/usr/bin/env python

import BeautifulSoup

VALID_TAGS = ['b', 'i', 'em', 'p', 'ul', 'ol', 'li',
              'br', 'sub', 'sup', 'strike', 'img']
# No 'style', looks like it can be malused
VALID_ATTRS = ['id', 'class', 'src', 'alt', 'href', 'title'] 

""" Sanitize HTML code, stips of all tags but the VALID ones """
def sanitize_html(value):

  soup = BeautifulSoup.BeautifulSoup(value)
    
  for tag in soup.findAll(True):
    if tag.name not in VALID_TAGS:
      tag.hidden = True
    else:
      for i in range(0, len(tag.attrs)):
        if tag.attrs[i][0] not in VALID_ATTRS:
          tag.hidden = True
  return soup.renderContents()
